| convertToGFA |
16 |
- data/auxiliary/graphs/medaka/08/11.gfa
- data/auxiliary/graphs/medaka/08/13.gfa
- data/auxiliary/graphs/medaka/08/15.gfa
- data/auxiliary/graphs/medaka/08/17.gfa
- data/auxiliary/graphs/medaka/09/11.gfa
- data/auxiliary/graphs/medaka/09/13.gfa
- data/auxiliary/graphs/medaka/09/15.gfa
- data/auxiliary/graphs/medaka/09/17.gfa
- data/auxiliary/graphs/nanopolish/08/11.gfa
- data/auxiliary/graphs/nanopolish/08/13.gfa
- data/auxiliary/graphs/nanopolish/08/15.gfa
- data/auxiliary/graphs/nanopolish/08/17.gfa
- data/auxiliary/graphs/nanopolish/09/11.gfa
- data/auxiliary/graphs/nanopolish/09/13.gfa
- data/auxiliary/graphs/nanopolish/09/15.gfa
- data/auxiliary/graphs/nanopolish/09/17.gfa
|
|
|
| python3 scripts/convertToGFA.py {input} {output} {params.k} 2> {log}
|
|
| overabundance_analysis |
4 |
- data/output/softClippedSeqs/medaka/08.html
- data/output/softClippedSeqs/medaka/08.json
- data/auxiliary/softClippedSeqs/medaka/08.corrected.fasta
- data/output/softClippedSeqs/medaka/09.html
- data/output/softClippedSeqs/medaka/09.json
- data/auxiliary/softClippedSeqs/medaka/09.corrected.fasta
- data/output/softClippedSeqs/nanopolish/08.html
- data/output/softClippedSeqs/nanopolish/08.json
- data/auxiliary/softClippedSeqs/nanopolish/08.corrected.fasta
- data/output/softClippedSeqs/nanopolish/09.html
- data/output/softClippedSeqs/nanopolish/09.json
- data/auxiliary/softClippedSeqs/nanopolish/09.corrected.fasta
|
|
|
| fastp -i {input} -o {output.fixedFasta} -p -j {output.reportjson} -h {output.reporthtml}
|
|
| plotKmerHistogram |
16 |
- data/output/kmerHistograms/medaka/08_11.svg
- data/output/kmerHistograms/medaka/08_13.svg
- data/output/kmerHistograms/medaka/08_15.svg
- data/output/kmerHistograms/medaka/08_17.svg
- data/output/kmerHistograms/medaka/09_11.svg
- data/output/kmerHistograms/medaka/09_13.svg
- data/output/kmerHistograms/medaka/09_15.svg
- data/output/kmerHistograms/medaka/09_17.svg
- data/output/kmerHistograms/nanopolish/08_11.svg
- data/output/kmerHistograms/nanopolish/08_13.svg
- data/output/kmerHistograms/nanopolish/08_15.svg
- data/output/kmerHistograms/nanopolish/08_17.svg
- data/output/kmerHistograms/nanopolish/09_11.svg
- data/output/kmerHistograms/nanopolish/09_13.svg
- data/output/kmerHistograms/nanopolish/09_15.svg
- data/output/kmerHistograms/nanopolish/09_17.svg
|
|
- biopython=1.71
- python=3.7
- regex
- openssl = 1.0
- samtools = 1.9
- seaborn = 0.9.0
- scipy = 1.2.1
- pysam = 0.15.2
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39 | import matplotlib.pyplot as plt
import numpy as np
import scipy.signal
import json
kmer_counts = json.load(open(snakemake.input['kmerProfile'],'r'))
uniqueKmersPerK = json.load(open(snakemake.input['uniqueKmers'],'r'))
k= snakemake.params['k']
plt.rcParams['figure.figsize'] = [16, 24]
fig,axs = plt.subplots(2)
kmer_counts_list = [kmer_counts[kmer] for kmer in kmer_counts]
b = np.amax(kmer_counts_list)
hist,_,_ = axs[0].hist(kmer_counts_list, bins=b)
axs[0].set_title('raw k-mer histogram')
local_mins = scipy.signal.find_peaks_cwt(-hist,np.arange(30,70))
threshold = local_mins[0]
# Create a list of k-mers that occur more frequent than the cutoff-point
filteredKmers = [kmer for kmer in kmer_counts if kmer_counts[kmer] > threshold]
kmer_counts_list_filtered = [kmer_counts[kmer] for kmer in filteredKmers]
b = np.arange(np.amax(kmer_counts_list_filtered) + 1)
hist_filtered, _, _ = axs[1].hist(kmer_counts_list_filtered, bins=b)
# Compare to expected value
axs[1].set_title(
'error-threshold: {} | k: {} | unique k-mers: {}/{} expected'.format(threshold,k,len(filteredKmers),uniqueKmersPerK[k])
,fontsize=9)
fig.text(0.5,0.04,'k-mer frequency',ha='center')
fig.text(0.04,0.5,'k-mer count',va='center',rotation='vertical')
plt.savefig(snakemake.output[0])
|
|
| detectExpectedUniqueKmers |
1 |
- data/output/tobigram.svg
- data/auxiliary/uniqueKmers.json
|
|
- biopython=1.71
- python=3.7
- regex
- openssl = 1.0
- samtools = 1.9
- seaborn = 0.9.0
- scipy = 1.2.1
- pysam = 0.15.2
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49 | from Bio import SeqIO
from shared import *
import matplotlib.pyplot as plt
import json
x = []
y = []
uniqueKmersPerK = {}
# Read reference
reference = SeqIO.read(snakemake.input[0],'fasta')
ks = snakemake.params['krange']
for k in ks:
# Create an empty dictionary to score kmer counts (how often each kmer is observed)
kmers_reference = {}
parseKmers(kmers_reference,reference,k)
ambiguousKmers = sum(1 for kmer in kmers_reference if kmers_reference[kmer] > 1)
totalKmers = sum(1 for kmer in kmers_reference)
ratio = ambiguousKmers/totalKmers
uniqueKmersPerK[k] = totalKmers-ambiguousKmers
duplicates = sum(1 for kmer in kmers_reference if kmers_reference[kmer] == 2)
print(k,totalKmers,duplicates)
x.append(k)
y.append(ratio)
# plot tobigram
plt.rcParams['figure.figsize'] = [10, 5]
plt.plot(x,y)
plt.xlabel('k')
plt.ylabel('fraction of ambiguous k-mers')
plt.savefig(snakemake.output['tobigram'])
with open(snakemake.output['uniqueKmers'],'w') as outfile:
json.dump(uniqueKmersPerK,outfile)
|
|
| bCalm |
16 |
- data/auxiliary/graphs/medaka/08/11.unitigs.fa
- data/auxiliary/graphs/medaka/08/13.unitigs.fa
- data/auxiliary/graphs/medaka/08/15.unitigs.fa
- data/auxiliary/graphs/medaka/08/17.unitigs.fa
- data/auxiliary/graphs/medaka/09/11.unitigs.fa
- data/auxiliary/graphs/medaka/09/13.unitigs.fa
- data/auxiliary/graphs/medaka/09/15.unitigs.fa
- data/auxiliary/graphs/medaka/09/17.unitigs.fa
- data/auxiliary/graphs/nanopolish/08/11.unitigs.fa
- data/auxiliary/graphs/nanopolish/08/13.unitigs.fa
- data/auxiliary/graphs/nanopolish/08/15.unitigs.fa
- data/auxiliary/graphs/nanopolish/08/17.unitigs.fa
- data/auxiliary/graphs/nanopolish/09/11.unitigs.fa
- data/auxiliary/graphs/nanopolish/09/13.unitigs.fa
- data/auxiliary/graphs/nanopolish/09/15.unitigs.fa
- data/auxiliary/graphs/nanopolish/09/17.unitigs.fa
|
|
|
| bcalm -in {input} -kmer-size {params.k} -out {params.outprefix} -abundance-min 50 2> {log}
|
|
| addPseudoQualities |
4 |
- data/auxiliary/softClippedSeqs/medaka/08.fq
- data/auxiliary/softClippedSeqs/medaka/09.fq
- data/auxiliary/softClippedSeqs/nanopolish/08.fq
- data/auxiliary/softClippedSeqs/nanopolish/09.fq
|
|
|
| perl scripts/fasta_to_fastq.pl {input} > {output}
|
|
| createKmerProfiles |
16 |
- data/auxiliary/kmerProfiles/medaka/08_11.json
- data/auxiliary/kmerProfiles/medaka/08_13.json
- data/auxiliary/kmerProfiles/medaka/08_15.json
- data/auxiliary/kmerProfiles/medaka/08_17.json
- data/auxiliary/kmerProfiles/medaka/09_11.json
- data/auxiliary/kmerProfiles/medaka/09_13.json
- data/auxiliary/kmerProfiles/medaka/09_15.json
- data/auxiliary/kmerProfiles/medaka/09_17.json
- data/auxiliary/kmerProfiles/nanopolish/08_11.json
- data/auxiliary/kmerProfiles/nanopolish/08_13.json
- data/auxiliary/kmerProfiles/nanopolish/08_15.json
- data/auxiliary/kmerProfiles/nanopolish/08_17.json
- data/auxiliary/kmerProfiles/nanopolish/09_11.json
- data/auxiliary/kmerProfiles/nanopolish/09_13.json
- data/auxiliary/kmerProfiles/nanopolish/09_15.json
- data/auxiliary/kmerProfiles/nanopolish/09_17.json
|
|
- biopython=1.71
- python=3.7
- regex
- openssl = 1.0
- samtools = 1.9
- seaborn = 0.9.0
- scipy = 1.2.1
- pysam = 0.15.2
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 | from Bio import SeqIO
from shared import *
import json
scReads = SeqIO.parse(snakemake.input[0],'fasta')
k = snakemake.params['k']
kmers = {}
for read in scReads:
parseKmers(kmers,read.seq,k)
with open(snakemake.output[0],'w') as outfile:
json.dump(kmers,outfile)
|
|
| extractSoftClippedReads |
4 |
- data/auxiliary/softClippedSeqs/medaka/08.fasta
- data/auxiliary/softClippedSeqs/medaka/09.fasta
- data/auxiliary/softClippedSeqs/nanopolish/08.fasta
- data/auxiliary/softClippedSeqs/nanopolish/09.fasta
|
|
- biopython=1.71
- python=3.7
- regex
- openssl = 1.0
- samtools = 1.9
- seaborn = 0.9.0
- scipy = 1.2.1
- pysam = 0.15.2
|
1
2
3
4
5
6
7
8
9
10
11
12
13 | import pysam
from Bio import Seq,SeqIO,SeqRecord
alignment = pysam.AlignmentFile(snakemake.input['alignment'],'rb')
records = []
for segment in alignment.fetch():
seq = Seq.Seq(segment.query_alignment_sequence)
rec = SeqRecord.SeqRecord(seq, segment.qname, "", "")
records.append(rec)
with open(snakemake.output[0],'w') as outfile:
SeqIO.write(records,outfile,'fasta')
|
|
| index_bwa |
4 |
- data/input/result_hac/barcode08.medaka.primertrimmed.sorted.bam.bai
- data/input/result_hac/barcode09.medaka.primertrimmed.sorted.bam.bai
- data/input/result_hac/barcode08.nanopolish.primertrimmed.sorted.bam.bai
- data/input/result_hac/barcode09.nanopolish.primertrimmed.sorted.bam.bai
|
|
- biopython=1.71
- python=3.7
- regex
- openssl = 1.0
- samtools = 1.9
- seaborn = 0.9.0
- scipy = 1.2.1
- pysam = 0.15.2
|
|